Abstract

OBJECTIVE :

The purpose of this model is to analyse the pattern and spread of the COVID-19 from January 2020 onwards. A variety of packages were used for this exercise.

#install.packages("kableExtra")
suppressMessages(library(magrittr)) # pipe operations
suppressMessages(library(lubridate)) # date operations
suppressMessages(library(tidyverse)) # ggplot2, tidyr, dplyr...
suppressMessages(library(gridExtra)) # multiple grid-based plots on a page
suppressMessages(library(ggforce)) # accelerating ggplot2
suppressMessages(library(kableExtra)) # complex tables
suppressMessages(library(leaflet)) #for map
suppressMessages(library(plotly)) #plotly

Data Ingestion :

Reading data from the COVID-19 folder which gets updated everyday. It contains data for the whole world.

confirmed <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/cd5d1b9eacb96a2b598cb0e6fb2a3145978df4d0/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")

death <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/cd5d1b9eacb96a2b598cb0e6fb2a3145978df4d0/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")

recovered <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/cd5d1b9eacb96a2b598cb0e6fb2a3145978df4d0/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")

Data Cleaning,Manipulation and Visualisation

Data Cleaning

Verifying the data and changing the data into the desirable format.

confirmed[1:10, 1:10]
##                  Province.State      Country.Region      Lat     Long X1.22.20
## 1                                       Afghanistan  33.0000  65.0000        0
## 2                                           Albania  41.1533  20.1683        0
## 3                                           Algeria  28.0339   1.6596        0
## 4                                           Andorra  42.5063   1.5218        0
## 5                                            Angola -11.2027  17.8739        0
## 6                               Antigua and Barbuda  17.0608 -61.7964        0
## 7                                         Argentina -38.4161 -63.6167        0
## 8                                           Armenia  40.0691  45.0382        0
## 9  Australian Capital Territory           Australia -35.4735 149.0124        0
## 10              New South Wales           Australia -33.8688 151.2093        0
##    X1.23.20 X1.24.20 X1.25.20 X1.26.20 X1.27.20
## 1         0        0        0        0        0
## 2         0        0        0        0        0
## 3         0        0        0        0        0
## 4         0        0        0        0        0
## 5         0        0        0        0        0
## 6         0        0        0        0        0
## 7         0        0        0        0        0
## 8         0        0        0        0        0
## 9         0        0        0        0        0
## 10        0        0        0        3        4
col <- ncol(confirmed)
## get dates from column names
dates <- names(confirmed)[5:col] %>% substr(2,8) %>% mdy()
range(dates)
## [1] "2020-01-22" "2020-03-25"
## [1] "2020-01-22" "2020-03-22"
min_date <- min(dates)
max_date <- max(dates)
min_date_formt <- min_date %>% format('%d %b %Y')
max_date_formt <- max_date %>% format('%d %b %Y')

Data Manipulation

Data Cleaning , Manipulation and Visualisation was performed. We can see the bigger radius shows a few countried which have the highest number of people affected by COVID-19.

cleanData <- function(data) {
## remove some columns
data %<>% select(-c(Province.State, Lat, Long)) %>% rename(country=Country.Region)
## convert from wide to long format
data %<>% gather(key=date, value=count, -country)
## convert from character to date
data %<>% mutate(date = date %>% substr(2,8) %>% mdy())
## aggregate by country
data %<>% group_by(country, date) %>% summarise(count=sum(count, na.rm=T)) %>% as.data.frame()
return(data)
}

data_confirmed <- confirmed %>% cleanData() %>% rename(confirmed=count)
data_deaths <- death %>% cleanData() %>% rename(deaths=count)
data_recovered <- recovered %>% cleanData() %>% rename(recovered=count)

## merge above 3 datasets into one, by country and date
data <- data_confirmed %>% merge(data_deaths) %>% merge(data_recovered)

## countries/regions with confirmed cases, excl. cruise ships
countries <- data %>% pull(country) %>% setdiff('Cruise Ship')

## first 10 records when it first broke out in India
Ind <- data %>% filter(country=='India')
p <-ggplot(data= Ind, mapping = aes(x= date, y= confirmed)) + geom_bar(stat= "identity", fill = "#56B4E9")
#ggplotly(p)

p1 <- ggplot(data= Ind, mapping = aes(x= date, y= deaths)) + geom_bar(stat= "identity", fill = "#CC0000")
#ggplotly(p1)

p2 <- ggplot(data= Ind, mapping = aes(x= date, y= recovered)) + geom_bar(stat= "identity", fill = "#00FF00")
#ggplotly(p2)
subplot(p, p1, p2, margin = 0.1, nrows = 3, titleX = TRUE)

Map

Visualizing data in the form of Map

## counts for the whole world
data_world <- data %>% group_by(date) %>%
summarise(country='World',
confirmed = sum(confirmed),
deaths = sum(deaths),
recovered = sum(recovered))
data %<>% rbind(data_world)
## current confirmed cases
data %<>% mutate(current_confirmed = confirmed - deaths - recovered)
## select last column, which is the number of latest confirmed cases
x <- confirmed
x$confirmed <- x[, ncol(x)]
x %<>% select(c(Country.Region, Province.State, Lat, Long, confirmed)) %>%
mutate(txt=paste0(Country.Region, ' - ', Province.State, ': ', confirmed))
m <- leaflet(width=1200, height=800) %>% addTiles()
# circle marker (units in pixels)
m %<>% addCircleMarkers(x$Long, x$Lat,
radius=2+log2(x$confirmed), stroke=F,
color='red', fillOpacity=0.3,
popup=x$txt)
# world
m

References :

COVID-19 Data Analysis with R - Worldwide : Yanchang Zhao,,http:// RDataMining.com

Data was taken from https://github.com/CSSEGISandData

LS0tCnRpdGxlOiAiQ09WSUQtMTkiCmF1dGhvcjogIkt1bWFyaSBTdW5kYXJhbSIKZGF0ZTogImByIGZvcm1hdChTeXMudGltZSgpLCAnJVggJWQgJUIsICVZJylgIgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6CiAgICBmaWdfd2lkdGg6IDYgCiAgICBmaWdfaGVpZ2h0OiA0CiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKICAgIGNvZGVfZG93bmxvYWQgOiB0cnVlCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgojIHsudGFic2V0IC50YWJzZXQtZmFkZSAudGFic2V0LXBpbGxzfQoKIyMgX19BYnN0cmFjdF9fCgoqKk9CSkVDVElWRSoqIDoKCj5UaGUgcHVycG9zZSBvZiB0aGlzIG1vZGVsIGlzIHRvIGFuYWx5c2UgdGhlIHBhdHRlcm4gYW5kIHNwcmVhZCBvZiB0aGUgQ09WSUQtMTkgZnJvbSBKYW51YXJ5IDIwMjAgb253YXJkcy4gQSB2YXJpZXR5IG9mIHBhY2thZ2VzIHdlcmUgdXNlZCBmb3IgdGhpcyBleGVyY2lzZS4KCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygia2FibGVFeHRyYSIpCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShtYWdyaXR0cikpICMgcGlwZSBvcGVyYXRpb25zCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShsdWJyaWRhdGUpKSAjIGRhdGUgb3BlcmF0aW9ucwpzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkodGlkeXZlcnNlKSkgIyBnZ3Bsb3QyLCB0aWR5ciwgZHBseXIuLi4Kc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KGdyaWRFeHRyYSkpICMgbXVsdGlwbGUgZ3JpZC1iYXNlZCBwbG90cyBvbiBhIHBhZ2UKc3VwcHJlc3NNZXNzYWdlcyhsaWJyYXJ5KGdnZm9yY2UpKSAjIGFjY2VsZXJhdGluZyBnZ3Bsb3QyCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShrYWJsZUV4dHJhKSkgIyBjb21wbGV4IHRhYmxlcwpzdXBwcmVzc01lc3NhZ2VzKGxpYnJhcnkobGVhZmxldCkpICNmb3IgbWFwCnN1cHByZXNzTWVzc2FnZXMobGlicmFyeShwbG90bHkpKSAjcGxvdGx5CmBgYAoKKipEYXRhIEluZ2VzdGlvbioqIDoKCj5SZWFkaW5nIGRhdGEgZnJvbSB0aGUgQ09WSUQtMTkgZm9sZGVyIHdoaWNoIGdldHMgdXBkYXRlZCBldmVyeWRheS4gSXQgY29udGFpbnMgZGF0YSBmb3IgdGhlIHdob2xlIHdvcmxkLgpgYGB7cn0KY29uZmlybWVkIDwtIHJlYWQuY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vQ1NTRUdJU2FuZERhdGEvQ09WSUQtMTkvY2Q1ZDFiOWVhY2I5NmEyYjU5OGNiMGU2ZmIyYTMxNDU5NzhkZjRkMC9jc3NlX2NvdmlkXzE5X2RhdGEvY3NzZV9jb3ZpZF8xOV90aW1lX3Nlcmllcy90aW1lX3Nlcmllc19jb3ZpZDE5X2NvbmZpcm1lZF9nbG9iYWwuY3N2IikKCmRlYXRoIDwtIHJlYWQuY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vQ1NTRUdJU2FuZERhdGEvQ09WSUQtMTkvY2Q1ZDFiOWVhY2I5NmEyYjU5OGNiMGU2ZmIyYTMxNDU5NzhkZjRkMC9jc3NlX2NvdmlkXzE5X2RhdGEvY3NzZV9jb3ZpZF8xOV90aW1lX3Nlcmllcy90aW1lX3Nlcmllc19jb3ZpZDE5X2RlYXRoc19nbG9iYWwuY3N2IikKCnJlY292ZXJlZCA8LSByZWFkLmNzdigiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL0NTU0VHSVNhbmREYXRhL0NPVklELTE5L2NkNWQxYjllYWNiOTZhMmI1OThjYjBlNmZiMmEzMTQ1OTc4ZGY0ZDAvY3NzZV9jb3ZpZF8xOV9kYXRhL2Nzc2VfY292aWRfMTlfdGltZV9zZXJpZXMvdGltZV9zZXJpZXNfY292aWQxOV9yZWNvdmVyZWRfZ2xvYmFsLmNzdiIpCgpgYGAKIyB7LnRhYnNldCAudGFic2V0LWZhZGUgLnRhYnNldC1waWxsc30KIyMgX19EYXRhIENsZWFuaW5nLE1hbmlwdWxhdGlvbiBhbmQgVmlzdWFsaXNhdGlvbl9fIAojIyMgey50YWJzZXQgLnRhYnNldC1mYWRlfQojIyMjIF9fRGF0YSBDbGVhbmluZ19fIApWZXJpZnlpbmcgdGhlIGRhdGEgYW5kIGNoYW5naW5nIHRoZSBkYXRhIGludG8gdGhlIGRlc2lyYWJsZSBmb3JtYXQuCmBgYHtyfQpjb25maXJtZWRbMToxMCwgMToxMF0KY29sIDwtIG5jb2woY29uZmlybWVkKQojIyBnZXQgZGF0ZXMgZnJvbSBjb2x1bW4gbmFtZXMKZGF0ZXMgPC0gbmFtZXMoY29uZmlybWVkKVs1OmNvbF0gJT4lIHN1YnN0cigyLDgpICU+JSBtZHkoKQpyYW5nZShkYXRlcykKIyMgWzFdICIyMDIwLTAxLTIyIiAiMjAyMC0wMy0yMiIKbWluX2RhdGUgPC0gbWluKGRhdGVzKQptYXhfZGF0ZSA8LSBtYXgoZGF0ZXMpCm1pbl9kYXRlX2Zvcm10IDwtIG1pbl9kYXRlICU+JSBmb3JtYXQoJyVkICViICVZJykKbWF4X2RhdGVfZm9ybXQgPC0gbWF4X2RhdGUgJT4lIGZvcm1hdCgnJWQgJWIgJVknKQpgYGAKCgoKIyMjIyBfX0RhdGEgTWFuaXB1bGF0aW9uX18gCgpEYXRhIENsZWFuaW5nICwgTWFuaXB1bGF0aW9uIGFuZCBWaXN1YWxpc2F0aW9uIHdhcyBwZXJmb3JtZWQuIFdlIGNhbiBzZWUgdGhlIGJpZ2dlciByYWRpdXMgc2hvd3MgYSBmZXcgY291bnRyaWVkIHdoaWNoIGhhdmUgdGhlIGhpZ2hlc3QgbnVtYmVyIG9mIHBlb3BsZSBhZmZlY3RlZCBieSBDT1ZJRC0xOS4KYGBge3IsIHdhcm5pbmc9RkFMU0V9CmNsZWFuRGF0YSA8LSBmdW5jdGlvbihkYXRhKSB7CiMjIHJlbW92ZSBzb21lIGNvbHVtbnMKZGF0YSAlPD4lIHNlbGVjdCgtYyhQcm92aW5jZS5TdGF0ZSwgTGF0LCBMb25nKSkgJT4lIHJlbmFtZShjb3VudHJ5PUNvdW50cnkuUmVnaW9uKQojIyBjb252ZXJ0IGZyb20gd2lkZSB0byBsb25nIGZvcm1hdApkYXRhICU8PiUgZ2F0aGVyKGtleT1kYXRlLCB2YWx1ZT1jb3VudCwgLWNvdW50cnkpCiMjIGNvbnZlcnQgZnJvbSBjaGFyYWN0ZXIgdG8gZGF0ZQpkYXRhICU8PiUgbXV0YXRlKGRhdGUgPSBkYXRlICU+JSBzdWJzdHIoMiw4KSAlPiUgbWR5KCkpCiMjIGFnZ3JlZ2F0ZSBieSBjb3VudHJ5CmRhdGEgJTw+JSBncm91cF9ieShjb3VudHJ5LCBkYXRlKSAlPiUgc3VtbWFyaXNlKGNvdW50PXN1bShjb3VudCwgbmEucm09VCkpICU+JSBhcy5kYXRhLmZyYW1lKCkKcmV0dXJuKGRhdGEpCn0KCmRhdGFfY29uZmlybWVkIDwtIGNvbmZpcm1lZCAlPiUgY2xlYW5EYXRhKCkgJT4lIHJlbmFtZShjb25maXJtZWQ9Y291bnQpCmRhdGFfZGVhdGhzIDwtIGRlYXRoICU+JSBjbGVhbkRhdGEoKSAlPiUgcmVuYW1lKGRlYXRocz1jb3VudCkKZGF0YV9yZWNvdmVyZWQgPC0gcmVjb3ZlcmVkICU+JSBjbGVhbkRhdGEoKSAlPiUgcmVuYW1lKHJlY292ZXJlZD1jb3VudCkKCiMjIG1lcmdlIGFib3ZlIDMgZGF0YXNldHMgaW50byBvbmUsIGJ5IGNvdW50cnkgYW5kIGRhdGUKZGF0YSA8LSBkYXRhX2NvbmZpcm1lZCAlPiUgbWVyZ2UoZGF0YV9kZWF0aHMpICU+JSBtZXJnZShkYXRhX3JlY292ZXJlZCkKCiMjIGNvdW50cmllcy9yZWdpb25zIHdpdGggY29uZmlybWVkIGNhc2VzLCBleGNsLiBjcnVpc2Ugc2hpcHMKY291bnRyaWVzIDwtIGRhdGEgJT4lIHB1bGwoY291bnRyeSkgJT4lIHNldGRpZmYoJ0NydWlzZSBTaGlwJykKCiMjIGZpcnN0IDEwIHJlY29yZHMgd2hlbiBpdCBmaXJzdCBicm9rZSBvdXQgaW4gSW5kaWEKSW5kIDwtIGRhdGEgJT4lIGZpbHRlcihjb3VudHJ5PT0nSW5kaWEnKQpwIDwtZ2dwbG90KGRhdGE9IEluZCwgbWFwcGluZyA9IGFlcyh4PSBkYXRlLCB5PSBjb25maXJtZWQpKSArIGdlb21fYmFyKHN0YXQ9ICJpZGVudGl0eSIsIGZpbGwgPSAiIzU2QjRFOSIpCiNnZ3Bsb3RseShwKQoKcDEgPC0gZ2dwbG90KGRhdGE9IEluZCwgbWFwcGluZyA9IGFlcyh4PSBkYXRlLCB5PSBkZWF0aHMpKSArIGdlb21fYmFyKHN0YXQ9ICJpZGVudGl0eSIsIGZpbGwgPSAiI0NDMDAwMCIpCiNnZ3Bsb3RseShwMSkKCnAyIDwtIGdncGxvdChkYXRhPSBJbmQsIG1hcHBpbmcgPSBhZXMoeD0gZGF0ZSwgeT0gcmVjb3ZlcmVkKSkgKyBnZW9tX2JhcihzdGF0PSAiaWRlbnRpdHkiLCBmaWxsID0gIiMwMEZGMDAiKQojZ2dwbG90bHkocDIpCnN1YnBsb3QocCwgcDEsIHAyLCBtYXJnaW4gPSAwLjEsIG5yb3dzID0gMywgdGl0bGVYID0gVFJVRSkKYGBgCgoKIyMjIyBfX01hcF9fIApWaXN1YWxpemluZyBkYXRhIGluIHRoZSBmb3JtIG9mIE1hcApgYGB7ciwgd2FybmluZz1GQUxTRX0KIyMgY291bnRzIGZvciB0aGUgd2hvbGUgd29ybGQKZGF0YV93b3JsZCA8LSBkYXRhICU+JSBncm91cF9ieShkYXRlKSAlPiUKc3VtbWFyaXNlKGNvdW50cnk9J1dvcmxkJywKY29uZmlybWVkID0gc3VtKGNvbmZpcm1lZCksCmRlYXRocyA9IHN1bShkZWF0aHMpLApyZWNvdmVyZWQgPSBzdW0ocmVjb3ZlcmVkKSkKZGF0YSAlPD4lIHJiaW5kKGRhdGFfd29ybGQpCiMjIGN1cnJlbnQgY29uZmlybWVkIGNhc2VzCmRhdGEgJTw+JSBtdXRhdGUoY3VycmVudF9jb25maXJtZWQgPSBjb25maXJtZWQgLSBkZWF0aHMgLSByZWNvdmVyZWQpCmBgYApgYGB7cix3YXJuaW5nPUZBTFNFfQojIyBzZWxlY3QgbGFzdCBjb2x1bW4sIHdoaWNoIGlzIHRoZSBudW1iZXIgb2YgbGF0ZXN0IGNvbmZpcm1lZCBjYXNlcwp4IDwtIGNvbmZpcm1lZAp4JGNvbmZpcm1lZCA8LSB4WywgbmNvbCh4KV0KeCAlPD4lIHNlbGVjdChjKENvdW50cnkuUmVnaW9uLCBQcm92aW5jZS5TdGF0ZSwgTGF0LCBMb25nLCBjb25maXJtZWQpKSAlPiUKbXV0YXRlKHR4dD1wYXN0ZTAoQ291bnRyeS5SZWdpb24sICcgLSAnLCBQcm92aW5jZS5TdGF0ZSwgJzogJywgY29uZmlybWVkKSkKbSA8LSBsZWFmbGV0KHdpZHRoPTEyMDAsIGhlaWdodD04MDApICU+JSBhZGRUaWxlcygpCiMgY2lyY2xlIG1hcmtlciAodW5pdHMgaW4gcGl4ZWxzKQptICU8PiUgYWRkQ2lyY2xlTWFya2Vycyh4JExvbmcsIHgkTGF0LApyYWRpdXM9Mitsb2cyKHgkY29uZmlybWVkKSwgc3Ryb2tlPUYsCmNvbG9yPSdyZWQnLCBmaWxsT3BhY2l0eT0wLjMsCnBvcHVwPXgkdHh0KQojIHdvcmxkCm0KYGBgCioqUmVmZXJlbmNlcyoqIDoKCj5DT1ZJRC0xOSBEYXRhIEFuYWx5c2lzIHdpdGggUiAtIFdvcmxkd2lkZSA6IFlhbmNoYW5nIFpoYW8seWFuY2hhbmdAUkRhdGFNaW5pbmcuY29tLGh0dHA6Ly8gUkRhdGFNaW5pbmcuY29tCgo+RGF0YSB3YXMgdGFrZW4gZnJvbSBodHRwczovL2dpdGh1Yi5jb20vQ1NTRUdJU2FuZERhdGEK